library(tidyverse)
library(tidycensus)
library(tidytransit)
library(tigris)
library(sf)
library(kableExtra)
library(gt)

source("https://raw.githubusercontent.com/urbanSpatial/Public-Policy-Analytics-Landing/master/functions.r")

Wrangling data

First get boundary of Minneapolis-St Paul urbanized area

This uses the tigris package and filters for UA code 57628

twincities <- urban_areas(cb=TRUE) %>%
  filter(UACE10 == "57628") %>% 
  select(geometry)

Then get ACS tract data for whole state and intersect by boundary

Instead of merely displaying population, normalizing population by tract area (in square miles) will accurately visualize choropleth data

getACSyr <- function(yearACS) {
  temp<-get_acs(geography = 'tract',
                year = yearACS,
                variables = c("B25026_001","B25058_001","B08301_001","B08301_010"),
                state = "MN",
                geometry = TRUE,
                output = "wide") %>%
    mutate(year = yearACS) %>%
    mutate(TotalPop = B25026_001E) %>% 
    # normalize population as expected of choropleths; use square miles
    mutate(PopDens = B25026_001E / as.numeric(st_area(geometry) / 2.59e+6)) %>% 
    mutate(MedRent = B25058_001E) %>% 
    mutate(PctTransit = B08301_010E / B08301_001E * 100) %>% 
    select(!(B25026_001E:B08301_010M))
  
  st_join(temp, twincities, join=st_within, left=FALSE) %>% 
    st_transform('EPSG:2812')
}

tracts09 <- getACSyr(2009)
tracts19 <- getACSyr(2019)

Here we wrangle in LEHD Workforce Area Characteristics, which has job counts by block group. Then summarize by tract. ## Wrangle in LEHD WAC

lehdwac08 <- read_csv("data/mn_wac_S000_JT00_2008.csv") %>% 
  select(w_geocode,C000) %>%
  mutate(GEOID = str_sub(w_geocode,1,11)) %>%
  group_by(GEOID) %>% 
  summarise(jobs=sum(C000))
lehdwac18 <- read_csv("data/mn_wac_S000_JT00_2018.csv") %>% 
  select(w_geocode,C000) %>%
  mutate(GEOID = str_sub(w_geocode,1,11)) %>%
  group_by(GEOID) %>% 
  summarise(jobs=sum(C000))

like population, the choropleth should normalize jobs by square mile area

tracts09 <- tracts09 %>% left_join(lehdwac08,by="GEOID") %>% 
  mutate(JobDens = jobs / as.numeric(st_area(geometry) / 2.59e+6))
tracts19 <- tracts19 %>% left_join(lehdwac18,by="GEOID") %>% 
  mutate(JobDens = jobs / as.numeric(st_area(geometry) / 2.59e+6))

allTracts <- rbind(tracts09,tracts19)

Wrangle rail stops and centroid buffers

The Twin Cities’ transit agency is Metro Transit, which operates two light rail lines: the Blue Line and Green Line. The GTFS package here is a snapshot taken from the Transitland GTFS repository, with the Blue and Green Lines as route 901 and 902.

Four buffers were created from station point data: one half-mile buffer for each station, a unionized shapefile of the 1/2 mile buffers, a quarter-mile buffer for finer analysis, and a 1-foot buffer to utilize in multipleRingBuffer() function.

metrotransit <- read_gtfs("data/gtfs.zip")

service_id <- filter(metrotransit$calendar, monday==1) %>% pull(service_id)
route_id <- filter(metrotransit$routes, route_id==c('901','902')) %>% pull(route_id)
railStops <- filter_stops(metrotransit,service_id,route_id) %>% 
  group_by(stop_name) %>% slice(1) %>% stops_as_sf(2812)
railLines <- get_route_geometry(gtfs_as_sf(metrotransit), route_id, service_id)

# include one-foot buffer for multipleRingBuffer
railBuffers <- bind_rows(
  st_buffer(railStops,2640) %>% 
    mutate(Legend = "Buffer") %>% 
    dplyr::select(stop_name,Legend),
  st_union(st_buffer(railStops,2640)) %>% 
    st_sf() %>% 
    mutate(Legend = "Unionized Buffer"),
  st_union(st_buffer(railStops,1320)) %>% 
    st_sf() %>% 
    mutate(Legend = "Quarter-mile Buffer"),
  st_union(st_buffer(railStops,1)) %>% 
    st_sf() %>% 
    mutate(Legend = "1-foot Buffer")
)

buffer <- filter(railBuffers, Legend=="Unionized Buffer")
quarter <- filter(railBuffers,Legend=="Quarter-mile Buffer")
onefoot <- filter(railBuffers, Legend=="1-foot Buffer")

Select TOD tracts by buffer using centroid method

allTracts.group <- 
  rbind(
    st_centroid(allTracts)[buffer,] %>%
      st_drop_geometry() %>%
      left_join(allTracts) %>%
      st_sf() %>%
      mutate(TOD = "TOD"),
    st_centroid(allTracts)[buffer, op = st_disjoint] %>%
      st_drop_geometry() %>%
      left_join(allTracts) %>%
      st_sf() %>%
      mutate(TOD = "Non-TOD")) %>%
  mutate(MedRent = ifelse(year == 2009, MedRent * 1.275, MedRent))

Outputs of TOD indicators

Five indicators were included in the analysis: population density, employment density by workplace, median rent, and percent of commuters that took transit to work, or its mode share.

Population density and employment density were summarized on the tract level and divided by the geometric area of each tract in square miles. Transit mode share was calculated by dividing the number of estimated transit riders by estimated total commuters, all in the ACS 5-year estimates.

Employment data was found in the Census Bureau’s Longitudinal Employer-Household Dynamics datasets, for workplace area characteristics.

palette5 <- c("#f0f9e8","#bae4bc","#7bccc4","#43a2ca","#0868ac")

Define TOD and non-TOD tracts

Here we show what tracts in the urbanized area are considered as classified as a TOD-containing tract. The extent of the Blue and Green Lines are shown as well. Both lines originate in downtown Minneapolis; the Blue Line goes south to the airport, while the Green Line travels in a denser corridor to St. Paul.

ggplot() +
  geom_sf(data=allTracts.group, aes(fill = TOD), color="transparent") +
  geom_sf(data=railLines, aes(color = route_id),size=1) +
  facet_wrap(~year) +
  scale_fill_manual(name = "Census tract type",
                    values = c("#5ab4ac", "#d8b365")) +
  scale_color_manual(name = "Light rail line",
                     values = c('blue', 'darkgreen'),
                     labels = c('Blue Line', 'Green Line')) + 
  mapTheme() +
  labs(title = "Census tracts by transit-oriented development classification",
       subtitle = "Minneapolis-St. Paul, MN-WI Urbanized Area",
       caption = "Note: Green Line did not open until 2014")

Step 2: Time-space indicator maps

plotStep2 <- function(indicator, indTitle, units) {
  ggplot() +
    geom_sf(data=allTracts.group, aes(fill = q5(eval(parse(text=indicator)))), color="transparent") +
    geom_sf(data=buffer, color = "red", fill = "transparent") +
    scale_fill_manual(values = palette5,
                      labels = qBr(allTracts.group, indicator),
                      name = paste0(indTitle,"\n",units,"\n(Quintile breaks)")) +
    labs(title = paste0(indTitle,", 2009-2019"),
         subtitle = "Minneapolis-St. Paul, MN-WI Urbanized Area",
         caption = "Red border = 1/2 mile buffer around light rail stations") + 
    mapTheme() + 
    facet_wrap(~year)
}

The map and data of population density suggest that the urbanized area did not have significant population density increases from 2009 to 2019. In Non-TOD, there is seldom changes in population density. As for TOD, there is 15% increase in population density.

plotStep2("PopDens","Population density","(per sq. mi)")

The map and data of employment density suggest a difference in employment density at NON-TOD and TOD. Areas close to transit increased employment by around 40%; however, there was significant missing data in 2009 that could have affected this. The density in non-transit areas increased by 13%.

plotStep2("JobDens","Employment density","(per sq. mi)")

The map and data of median rent suggests the rent in most areas didn’t change significantly. But the map suggests that the rent at the northeastern inter section (which is downtown Minneapolis) between blue line and green line has increased by 200 dollars.

plotStep2("MedRent", "Median rent","($2019)")

The map and data of transit mode share suggest people live in TOD has increased by 1% of their transit mode share percent. However, people in non-transit area didn’t have any changes in their transit mode sharing.

plotStep2("PopDens","Population density","(per sq. mi)")

plotStep2("JobDens","Employment density","(per sq. mi)")

plotStep2("MedRent", "Median rent","($2019)")

plotStep2("PctTransit", "Transit mode share","(%)")

Step 3 & 4: Grouped bar plot and table

allTracts.Summary <- 
  st_drop_geometry(allTracts.group) %>%
  group_by(year, TOD) %>%
  summarize(Pop_Density = mean(PopDens, na.rm = T),
            Job_Density = mean(JobDens, na.rm = T),
            Median_Rent = mean(MedRent, na.rm = T),
            Pct_Transit = mean(PctTransit, na.rm = T)) %>% 
  gather(Variable, Value, -year, -TOD)

ggplot(allTracts.Summary, aes(factor(year), Value, fill = TOD)) +
  geom_bar(stat = "identity", position = "dodge") +
  scale_fill_manual(name = "Census tract type", values = c("#bae4bc", "#0868ac")) +
  facet_wrap(~Variable, scales = "free", ncol=2) +
  labs(title = "Comparing TOD and non-TOD Census tracts, 2009 and 2019",
       subtitle = "Minneapolis-St. Paul, MN-WI Urbanized Area") +
  plotTheme() + theme(legend.position = "bottom",
                      axis.title = element_blank())

# table using kable
allTracts.Summary %>%
  unite(year.TOD, year, TOD, sep = ": ", remove = T) %>%
  mutate(Value = round(Value, 2)) %>% 
  spread(year.TOD, Value) %>% 
  kable(caption = "Comparing TOD and non-TOD Census tracts, 2009 and 2019") %>%
  kable_material("striped", full_width = F, html_font = "Helvetica") %>%
  footnote(footnote_as_chunk = T,
           general_title = "",
           general = "Minneapolis-St. Paul, MN-WI Urbanized Area")
Comparing TOD and non-TOD Census tracts, 2009 and 2019
Variable 2009: Non-TOD 2009: TOD 2019: Non-TOD 2019: TOD
Job_Density 1579.25 6328.69 1783.81 8755.36
Median_Rent 1081.90 880.73 1138.66 906.10
Pct_Transit 5.34 12.67 5.22 13.44
Pop_Density 3866.91 7439.57 4111.53 8622.07
Minneapolis-St. Paul, MN-WI Urbanized Area
# table using gt
allTracts.Summary %>% 
  unite(year.TOD, year, TOD, sep = ": ", remove = T) %>%
  mutate(Value = round(Value, 2)) %>% 
  spread(year.TOD, Value) %>% 
  gt(rowname_col = "Variable") %>% 
  tab_spanner_delim(delim = ':') %>%
  tab_header(title = "Comparing TOD and non-TOD Census tracts, 2009 and 2019",
             subtitle = "Minneapolis-St. Paul, MN-WI Urbanized Area") %>% 
  opt_align_table_header(align="left") %>% 
  tab_style(style = list(cell_text(align = "right")),
            locations = cells_stub(rows = TRUE)) %>%
  tab_options(table.font.names = 'Helvetica',
              stub.font.weight = 'bold')
Comparing TOD and non-TOD Census tracts, 2009 and 2019
Minneapolis-St. Paul, MN-WI Urbanized Area
2009 2019
Non-TOD TOD Non-TOD TOD
Job_Density 1579.25 6328.69 1783.81 8755.36
Median_Rent 1081.90 880.73 1138.66 906.10
Pct_Transit 5.34 12.67 5.22 13.44
Pop_Density 3866.91 7439.57 4111.53 8622.07

Step 5: graduated symbol maps

The graduated symbol map of population shows that there is a larger population near the junction of two rail lines, and more people live near the Green Line than the Blue Line. In contrast, median rent within 0.5 mile of Blue Line stations is higher than that of Green Line. The result shows that rent and population are inversely related in the study region.

stationStat <- st_join(filter(railBuffers,Legend=="Buffer"),
                       st_centroid(filter(allTracts,year==2019))) %>% 
  st_drop_geometry() %>% 
  group_by(stop_name) %>% 
  summarise(Pop = sum(TotalPop, na.rm=T), Rent = median(MedRent, na.rm=T)) %>% 
  full_join(select(railStops,stop_name,geometry),by="stop_name")

ggplot() +
  geom_sf(data=filter(allTracts.group,TOD=="TOD"), color="#666666", lwd=.1) +
  geom_sf(data=stationStat, aes(geometry=geometry, size=Pop, color=Pop), alpha=0.7) +
  scale_size_binned() + scale_color_gradient(low="#bae4bc",high="#0868ac") +
  guides(size=guide_legend(), color=guide_legend()) + mapTheme() +
  labs(title="Light rail stations by total population within 1/2 mile (2019)",
       subtitle="Minneapolis-St. Paul, MN-WI Urbanized Area")

ggplot() +
  geom_sf(data=filter(allTracts.group,TOD=="TOD"), color="#666666", lwd=.1) +
  geom_sf(data=stationStat, aes(geometry=geometry, size=Rent, color=Rent), alpha=0.7) +
  scale_size_binned() + scale_color_gradient(low="#bae4bc",high="#0868ac") +
  guides(size=guide_legend(), color=guide_legend()) + mapTheme() + 
  labs(title="Light rail stations by median rent within 1/2 mile (2019)",
       subtitle="Minneapolis-St. Paul, MN-WI Urbanized Area")

Step 6: geom_line & multiple ring buffer

The line chart of the year 2019 shows the trend that the mean rent increases with distance within 3.5 miles and goes down with distance outside the radius of 3.5 miles, while the mean rent continues to increase with distance according to the plot of 2009. This indicates the peak of housing rent moves closer to rail stations during these years. The plots of 2009 and 2019 share the same characteristic that the rent near the rail stops is pretty low, probably due to the noises and insecurity. The lines are really new, maybe

ggplot() +
  geom_sf(data=multipleRingBuffer(onefoot, 50000, 2640)) +
  geom_sf(data=twincities,fill="transparent",color="black",size=1) +
  geom_sf(data=railStops,size=1) +
  mapTheme() +
  labs(title="Half-mile buffers around light rail stations",
       subtitle="Minneapolis-St. Paul, MN-WI Urbanized Area")

allTracts.rings <-
  st_join(st_centroid(dplyr::select(allTracts, GEOID, year)), 
          multipleRingBuffer(onefoot, 50000, 2640)) %>%
  st_drop_geometry() %>%
  left_join(select(allTracts, GEOID, MedRent, year), 
            by=c("GEOID"="GEOID", "year"="year")) %>%
  st_sf() %>%
  mutate(distance = distance / 5280) %>% 
  st_drop_geometry() %>% 
  group_by(year, distance) %>% 
  summarise(RingRent = mean(MedRent, na.rm=T)) %>% 
  arrange(year, distance)

ggplot(allTracts.rings, aes(x=distance,y=RingRent,color=factor(year))) +
  geom_line() +
  geom_point() +
  scale_color_manual(name="Year",values = c("#bae4bc", "#0868ac")) +
  scale_x_continuous(breaks=seq(0,6,by=1)) +
  plotTheme() +
  labs(title="Rent as a function of distance from light rail stations",
       subtitle="Minneapolis-St. Paul, MN-WI Urbanized Area",
       x="Radial distance from station (miles)",
       y="Average rent ($)")

Step 7: 311 data for Minneapolis

311 data was only available for Minneapolis. Crime data exists for St. Paul but was not geocoded, and only had block address data; geocoding would take too long.

The plot shows a higher 311 requests density in the central region of Minneapolis, approximately the junction of two rail lines. However, we can hardly draw a conclusion of any relationship between TOD and the density of 311 requests from the given data.

data311 <- read_csv("data/Public_311_2019.csv") %>% 
  st_as_sf(coords = c('Lon', 'Lat'), crs = 4326) %>% 
  st_transform(st_crs(allTracts))

choro311 <- st_join(filter(allTracts,year==2019), data311, join = st_intersects, left=F) %>%
  group_by(GEOID) %>% 
  summarise(tot311 = n()) %>% 
  mutate(dens311 = tot311 / as.numeric(st_area(geometry) / 2.59e+6))

ggplot() +
  geom_sf(data=choro311, aes(fill=q5(dens311)), color = "transparent") + 
  geom_sf(data=quarter, color = "red", fill = "transparent") +
  coord_sf(crs=st_crs(4326),xlim=c(-93.36,-93.15),ylim=c(44.82,45.1), expand=FALSE) +
  scale_fill_manual(values = palette5,
                  labels = qBr(choro311, "dens311"),
                  name = paste0("311 requests\n(per sq. mi.)\n(Quintile breaks)")) +
  mapTheme() +
  labs(title = "Density of 311 requests in Minneapolis",
       subtitle = "Red border = 1/4 mile buffer around light rail stations")